# !pip install dataidea==0.2.5
ANOVA for Feature Selection
import scipy as sp
from sklearn.feature_selection import SelectKBest
from dataidea.datasets import loadDataset
= loadDataset('fpl') # load fpl inbuilt fpl
=5) # select top 5 fpl.head(n
First_Name | Second_Name | Club | Goals_Scored | Assists | Total_Points | Minutes | Saves | Goals_Conceded | Creativity | Influence | Threat | Bonus | BPS | ICT_Index | Clean_Sheets | Red_Cards | Yellow_Cards | Position | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Bruno | Fernandes | MUN | 18 | 14 | 244 | 3101 | 0 | 36 | 1414.9 | 1292.6 | 1253 | 36 | 870 | 396.2 | 13 | 0 | 6 | MID |
1 | Harry | Kane | TOT | 23 | 14 | 242 | 3083 | 0 | 39 | 659.1 | 1318.2 | 1585 | 40 | 880 | 355.9 | 12 | 0 | 1 | FWD |
2 | Mohamed | Salah | LIV | 22 | 6 | 231 | 3077 | 0 | 41 | 825.7 | 1056.0 | 1980 | 21 | 657 | 385.8 | 11 | 0 | 0 | MID |
3 | Heung-Min | Son | TOT | 17 | 11 | 228 | 3119 | 0 | 36 | 1049.9 | 1052.2 | 1046 | 26 | 777 | 315.2 | 13 | 0 | 0 | MID |
4 | Patrick | Bamford | LEE | 17 | 11 | 194 | 3052 | 0 | 50 | 371.0 | 867.2 | 1512 | 26 | 631 | 274.6 | 10 | 0 | 3 | FWD |
# Create groups of goals scored for each player position
= fpl[fpl.Position == 'FWD']['Goals_Scored']
forwards_goals = fpl[fpl.Position == 'MID']['Goals_Scored']
midfielders_goals = fpl[fpl.Position == 'DEF']['Goals_Scored']
defenders_goals = fpl[fpl.Position == 'GK']['Goals_Scored'] goalkeepers_goals
# Perform the ANOVA test for the groups
= sp.stats.f_oneway(forwards_goals, midfielders_goals,
f_statistic, p_value
defenders_goals, goalkeepers_goals
)print("F-statistic:", f_statistic)
print("p-value:", p_value)
F-statistic: 33.281034594400445
p-value: 3.9257634156019246e-20
# Create groups of assists for each player position
= fpl[fpl.Position == 'FWD']['Assists']
forwards_assists = fpl[fpl.Position == 'MID']['Assists']
midfielders_assists = fpl[fpl.Position == 'DEF']['Assists']
defenders_assists = fpl[fpl.Position == 'GK']['Assists'] goalkeepers_assists
# Perform the ANOVA test for the groups
= sp.stats.f_oneway(forwards_assists, midfielders_assists,
f_statistic, p_value
defenders_assists, goalkeepers_assists
)print("F-statistic:", f_statistic)
print("p-value:", p_value)
F-statistic: 19.263717036430815
p-value: 5.124889288362087e-12
# Use scikit-learn's SelectKBest (with f_classif)
= SelectKBest(k=1)
test = test.fit(fpl[['Goals_Scored', 'Assists']], fpl.Position)
fit = fit.scores_
scores = fit.transform(fpl[['Goals_Scored', 'Assists']])
features = test.get_support(indices=True)
selected_indices
print('Feature Scores: ', scores)
print('Selected Features Indices: ', selected_indices)
Feature Scores: [33.28103459 19.26371704]
Selected Features Indices: [0]